# Code for calculating proportions discussed in figures 1,2,3

library(readr)
library(dplyr)
library(tidyr)

#load data
character_gender <- read_csv("/Users/cheng/OneDrive/Desktop/character/character/1_character_gender/1_output.csv")%>%
  filter(!character_gender %in% "U", !predicted_author_gender %in% c("U", 0, "#N/A")) %>%
  filter(publ_date >= 1850) 


#Find Percentage of Characterization that is Bodily Description
character_body_wc <- character_gender %>%
  filter(characterization %in% "anatomical") %>%
  group_by(publ_date, character_gender) %>%
  summarise(body_wc = sum(count))

character_all_wc <- character_gender %>%
  group_by(publ_date, character_gender) %>%
  summarise(all_wc = sum(count))

character_proportion <- character_all_wc %>%
  left_join(character_body_wc, by = c("publ_date", "character_gender")) %>%
  mutate(percentage = 100*(body_wc/all_wc))

#save data for Figure 1 
write_csv(character_proportion, "Figure1Data")

#Subset that Analysis by Author Gender
character_auth_body_wc <- character_gender %>%
  filter(characterization %in% "anatomical") %>%
  group_by(publ_date, predicted_author_gender, character_gender) %>%
  summarise(body_wc = sum(count))

character_auth_all_wc <- character_gender %>%
  group_by(publ_date, predicted_author_gender, character_gender) %>%
  summarise(all_wc = sum(count))

character_author_proportion <- character_auth_all_wc %>%
  left_join(character_auth_body_wc, by = c("publ_date", "predicted_author_gender", "character_gender")) %>%
  mutate(percentage = 100*(body_wc/all_wc))

m_auth_prop <- character_author_proportion %>%
  filter(predicted_author_gender %in% "M")

f_auth_prop <- character_author_proportion %>%
  filter(predicted_author_gender %in% "F")

#save data for Figure 2 & 3
wire_csv(f_auth_prop, "Figure2Data")
write_csv(m_auth_prop, "Figure3Data")